Import Libraries and dataset¶
In [20]:
import pandas as pd
import os
import altair as alt
os.getcwd()
data = pd.read_csv('Denton_County.csv')
data.head(1)
Out[20]:
| # | ML # | Property Sub Type | Mls Status | Standard Status | Address | City | Subdivision Name | Days On Market | CDOM | ... | Lot Size | # Parking Spaces | High School Name | School District | Middle School Name | Elementary School Name | Close Date | Close Price | Acres | Current Price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 20156700 | Single Family | Closed | Closed | 16204 Shawnee Trail | Fort Worth | Harriet Creek Ranch Ph 1 | 639 | 639 | ... | Less Than .5 Acre (not Zero) | NaN | Northwest | Northwest ISD | Pike | Hatfield | 45448 | 302500 | 0.137 | 302500 |
1 rows × 30 columns
Convert columns to numeric¶
In [21]:
data.dtypes
cols = ['SqFt','Original List Price','Close Price','Acres']
data[cols]=data[cols].apply(pd.to_numeric,errors='coerce',axis=1)
In [22]:
data.dtypes
Out[22]:
# int64 ML # int64 Property Sub Type object Mls Status object Standard Status object Address object City object Subdivision Name object Days On Market int64 CDOM int64 SqFt float64 Pool YN object Beds Total int64 Bath Total int64 Original List Price float64 Waterfront YN object HOA Fee float64 Fencing object Flooring object HOA Fee Includes object Lot Size object # Parking Spaces float64 High School Name object School District object Middle School Name object Elementary School Name object Close Date int64 Close Price float64 Acres float64 Current Price int64 dtype: object
Scatter Plot to show the relationship between Sale Price with living area and lot size¶
In [27]:
alt.Chart(data).mark_circle().encode(x="SqFt", y = "Close Price",color='School District')
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Out[27]:
In [40]:
# FIlter to only houses less than 1.2 million
data = data[data['Close Price']<=1200000]
alt.Chart(data).mark_circle().encode(x="SqFt", y = "Close Price",
color=alt.Color('School District',scale=alt.Scale(scheme='spectral')),
size="Acres").properties(width=800,height=300)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False) C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``. col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Out[40]:
In [60]:
# Filter to acres less than 0.8 acre
data = data[data['Acres']<=0.8]
selection = alt.selection(type='multi', fields=['School District'], on='mouseover', nearest=True)
alt.Chart(data).mark_circle().encode(x="SqFt", y = "Close Price",
color=alt.Color('School District',
scale=alt.Scale(scheme='spectral')),
size="Acres").properties(width=800,height=300)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection' is deprecated.
Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\vegalite\v5\api.py:425: AltairDeprecationWarning: The types 'single' and 'multi' are now
combined and should be specified using "selection_point()".
warnings.warn(
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Out[60]:
Scatterplot to see a comparison between living area and house prices. This is an interactive plot where you can select different school districts and see the results¶
In [61]:
selection = alt.selection(type='multi', fields=['School District'])
alt.Chart(data).mark_circle().encode(x="SqFt", y = "Close Price",
color=alt.Color('School District',
scale=alt.Scale(scheme='spectral')),
size="Acres",
opacity=alt.condition(selection,alt.value(1),alt.value(.2))).add_selection(selection).properties(height=300,width=800)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection' is deprecated.
Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\vegalite\v5\api.py:425: AltairDeprecationWarning: The types 'single' and 'multi' are now
combined and should be specified using "selection_point()".
warnings.warn(
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'add_selection' is deprecated. Use 'add_params' instead.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Out[61]:
Scatterplot to see a comparison between lot sizes and house prices. This is an interactive plot where you can select different school districts and see results¶
In [62]:
selection = alt.selection(type='multi', fields=['School District'])
alt.Chart(data).mark_circle().encode(x="Acres", y = "Close Price",
color=alt.Color('School District',
scale=alt.Scale(scheme='spectral')),
opacity=alt.condition(selection,alt.value(1),alt.value(.2))).add_selection(selection).properties(height=300,width=800)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection' is deprecated.
Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\vegalite\v5\api.py:425: AltairDeprecationWarning: The types 'single' and 'multi' are now
combined and should be specified using "selection_point()".
warnings.warn(
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'add_selection' is deprecated. Use 'add_params' instead.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Out[62]:
Difference in house prices based on school district¶
In [74]:
data['price/sqft'] = data['Close Price']/data['SqFt']
C:\Users\sorab\AppData\Local\Temp\ipykernel_18592\3918093738.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy data['price/sqft'] = data['Close Price']/data['SqFt']
In [85]:
# Let's implement filtering using dynamic queries.
data['price/sqft'] = data['price/sqft'].apply(lambda x: round(x, 2))
selection = alt.selection(type="multi", fields=["School District"])
# Create a container for our two different views
base = alt.Chart(data).properties(width=500, height=250)
# Let's specify our overview chart
overview = alt.Chart(data).mark_bar().encode(
y = "mean(price/sqft)",
x = alt.X(field='School District', type='nominal', sort=alt.EncodingSortField(field='School District', op='mean')),
tooltip=["School District","price/sqft"],
color=alt.condition(selection, alt.value("orange"), alt.value("lightgrey"))
).add_selection(selection).properties(height=250, width=250)
# Create a detail chart
detail = hist = base.mark_bar().encode(y = "mean(price/sqft)",
x = alt.X(field='City', type='nominal',
sort=alt.EncodingSortField(field='School District', op='mean')),
tooltip=["City",'price/sqft']).transform_filter(selection).properties(height=250, width=250)
overview | detail
C:\Users\sorab\AppData\Local\Temp\ipykernel_18592\4136440937.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
data['price/sqft'] = data['price/sqft'].apply(lambda x: round(x, 2))
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection' is deprecated.
Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\vegalite\v5\api.py:425: AltairDeprecationWarning: The types 'single' and 'multi' are now
combined and should be specified using "selection_point()".
warnings.warn(
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'add_selection' is deprecated. Use 'add_params' instead.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Out[85]:
In [86]:
## Diff in prices based on pool
In [92]:
# Let's implement filtering using dynamic queries.
data['price/sqft'] = data['price/sqft'].apply(lambda x: round(x, 2))
data2 = data[data['Pool YN'].str.len()>0]
data2.head
selection = alt.selection(type="multi", fields=["School District"])
# Create a container for our two different views
base = alt.Chart(data2).properties(width=500, height=250)
# Let's specify our overview chart
overview = alt.Chart(data2).mark_bar().encode(
y = "mean(price/sqft)",
x = alt.X(field='School District', type='nominal', sort=alt.EncodingSortField(field='School District', op='mean')),
tooltip=["School District","price/sqft"],
color=alt.condition(selection, alt.value("orange"), alt.value("lightgrey"))
).add_selection(selection).properties(height=250, width=250)
# Create a detail chart
detail = hist = base.mark_bar().encode(y = "mean(price/sqft)",
x = alt.X(field='Pool YN', type='nominal',
sort=alt.EncodingSortField(field='School District', op='mean')),
tooltip=["Pool YN",'price/sqft']).transform_filter(selection).properties(height=250, width=250)
overview | detail
C:\Users\sorab\AppData\Local\Temp\ipykernel_18592\1271744573.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
data['price/sqft'] = data['price/sqft'].apply(lambda x: round(x, 2))
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'selection' is deprecated.
Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\vegalite\v5\api.py:425: AltairDeprecationWarning: The types 'single' and 'multi' are now
combined and should be specified using "selection_point()".
warnings.warn(
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\deprecation.py:65: AltairDeprecationWarning: 'add_selection' is deprecated. Use 'add_params' instead.
warnings.warn(message, AltairDeprecationWarning, stacklevel=1)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
C:\Users\sorab\anaconda3\Lib\site-packages\altair\utils\core.py:395: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
Out[92]:
In [ ]: